import { Title } from "@solidjs/meta" import { createAsync, query, useParams } from "@solidjs/router" import { createSignal, For, Show } from "solid-js" import { Database, eq } from "@opencode-ai/console-core/drizzle/index.js" import { BenchmarkTable } from "@opencode-ai/console-core/schema/benchmark.sql.js" import { useI18n } from "~/context/i18n" interface TaskSource { repo: string from: string to: string } interface Judge { score: number rationale: string judge: string } interface ScoreDetail { criterion: string weight: number average: number variance?: number judges?: Judge[] } interface RunUsage { input: number output: number cost: number } interface Run { task: string model: string agent: string score: { final: number base: number penalty: number } scoreDetails: ScoreDetail[] usage?: RunUsage duration?: number } interface Prompt { commit: string prompt: string } interface AverageUsage { input: number output: number cost: number } interface Task { averageScore: number averageDuration?: number averageUsage?: AverageUsage model?: string agent?: string summary?: string runs?: Run[] task: { id: string source: TaskSource prompts?: Prompt[] } } interface BenchmarkResult { averageScore: number tasks: Task[] } async function getTaskDetail(benchmarkId: string, taskId: string) { "use server" const rows = await Database.use((tx) => tx.select().from(BenchmarkTable).where(eq(BenchmarkTable.id, benchmarkId)).limit(1), ) if (!rows[0]) return null const parsed = JSON.parse(rows[0].result) as BenchmarkResult const task = parsed.tasks.find((t) => t.task.id === taskId) return task ?? null } const queryTaskDetail = query(getTaskDetail, "benchmark.task.detail") function formatDuration(ms: number): string { const seconds = Math.floor(ms / 1000) const minutes = Math.floor(seconds / 60) const remainingSeconds = seconds % 60 if (minutes > 0) { return `${minutes}m ${remainingSeconds}s` } return `${remainingSeconds}s` } export default function BenchDetail() { const params = useParams() const i18n = useI18n() const [benchmarkId, taskId] = (params.id ?? "").split(":") const task = createAsync(() => queryTaskDetail(benchmarkId, taskId)) return (
{i18n.t("bench.detail.title", { task: taskId })}
{i18n.t("bench.detail.notFound")}

}>
{i18n.t("bench.detail.labels.agent")}: {task()?.agent ?? i18n.t("bench.detail.na")}
{i18n.t("bench.detail.labels.model")}: {task()?.model ?? i18n.t("bench.detail.na")}
{i18n.t("bench.detail.labels.task")}: {task()!.task.id}
{i18n.t("bench.detail.labels.repo")}: {task()!.task.source.repo}
{i18n.t("bench.detail.labels.from")}: {task()!.task.source.from.slice(0, 7)}
{i18n.t("bench.detail.labels.to")}: {task()!.task.source.to.slice(0, 7)}
0}>
{i18n.t("bench.detail.labels.prompt")}: {(p) => (
{i18n.t("bench.detail.labels.commit")}: {p.commit.slice(0, 7)}

{p.prompt}

)}

{i18n.t("bench.detail.labels.averageDuration")}: {task()?.averageDuration ? formatDuration(task()!.averageDuration!) : i18n.t("bench.detail.na")}
{i18n.t("bench.detail.labels.averageScore")}: {task()?.averageScore?.toFixed(3) ?? i18n.t("bench.detail.na")}
{i18n.t("bench.detail.labels.averageCost")}: {task()?.averageUsage?.cost ? `$${task()!.averageUsage!.cost.toFixed(4)}` : i18n.t("bench.detail.na")}
{i18n.t("bench.detail.labels.summary")}:

{task()!.summary}

0}>
{i18n.t("bench.detail.labels.runs")}: {(detail) => ( )} {(run, index) => ( {(detail) => ( )} )}
{i18n.t("bench.detail.table.run")} {i18n.t("bench.detail.table.score")} {i18n.t("bench.detail.table.cost")} {i18n.t("bench.detail.table.duration")} {detail.criterion} ({detail.weight})
{index() + 1} {run.score.final.toFixed(3)} ({run.score.base.toFixed(3)} - {run.score.penalty.toFixed(3)}) {run.usage?.cost ? `$${run.usage.cost.toFixed(4)}` : i18n.t("bench.detail.na")} {run.duration ? formatDuration(run.duration) : i18n.t("bench.detail.na")} {(judge) => ( {judge.score === 1 ? "✓" : judge.score === 0 ? "✗" : judge.score} )}
{(run, index) => (

{i18n.t("bench.detail.run.title", { n: index() + 1 })}

{i18n.t("bench.detail.labels.score")}: {run.score.final.toFixed(3)} ({i18n.t("bench.detail.labels.base")}: {run.score.base.toFixed(3)} -{" "} {i18n.t("bench.detail.labels.penalty")}: {run.score.penalty.toFixed(3)})
{(detail) => (
{detail.criterion} ({i18n.t("bench.detail.labels.weight")}: {detail.weight}){" "} {(judge) => ( {judge.score === 1 ? "✓" : judge.score === 0 ? "✗" : judge.score} )}
0}> {(judge) => { const [expanded, setExpanded] = createSignal(false) return (
setExpanded(!expanded())} > {expanded() ? "▼" : "▶"} {judge.score === 1 ? "✓" : judge.score === 0 ? "✗" : judge.score} {" "} {judge.judge}

{judge.rationale}

) }}
)}
)}
{(() => { const [jsonExpanded, setJsonExpanded] = createSignal(false) return (
{JSON.stringify(task(), null, 2)}
) })()}
) }